###########################################################
# Normalization of Censorship: Evidence from China
# Replication Files (Observational Study, Main Paper)
#
# Author: Tony Zirui Yang
# Date: January 24, 2024
#
# The lines below reproduce the main Figures and Tables
# related to the observational study reported in the paper
###########################################################

rm(list=ls())

# Packages ----
library("dplyr")
library("ggplot2")
library("data.table")

# Load the data
FreeWeChat <- fread("FreeWeChatResults.csv")
WeChatScope <- fread("WeChatScopeResults.csv")
WeiboScope <- fread("WeiboScopeResults.csv")

## ------------------------------
## Table 1
## ------------------------------

# Raw count of each category divided by total number of observations
# FreeWeChat censorship data
FreeWeChat_Table <- data.frame(
  Laebl = colnames(FreeWeChat[,3:11]),
  Category =  c("Advertisement", "Business", "Collective Action",
                "Govt Criticism", "Entertainment", "Foreign",
                "Other Govt-related", "Culture", "Others"),
  General_Category = c("Non-Political", "Moderately-Political", "Highly-Political",
                       "Highly-Political", "Non-Political", "Moderately-Political",
                       "Highly-Political", "Non-Political", "Non-Political"),
  Sum = colSums(FreeWeChat)[3:11],
  Percentage = round((colSums(FreeWeChat)[3:11] / sum(colSums(FreeWeChat)[3:11])) * 100,2)
)


# WeChatScope censorship data
WeChatScope_Table <- data.frame(
  Laebl = colnames(WeChatScope[,3:11]),
  Category =  c("Advertisement", "Business", "Collective Action",
                "Govt Criticism", "Entertainment", "Foreign",
                "Other Govt-related", "Culture", "Others"),
  General_Category = c("Non-Political", "Moderately-Political", "Highly-Political",
                       "Highly-Political", "Non-Political", "Moderately-Political",
                       "Highly-Political", "Non-Political", "Non-Political"),
  Sum = colSums(WeChatScope)[3:11],
  Percentage = round((colSums(WeChatScope)[3:11] / sum(colSums(WeChatScope)[3:11])) * 100,2)
)


# WeiboScope censorship data
WeiboScope_Table <- data.frame(
  Laebl = colnames(WeiboScope[,3:11]),
  Category =  c("Advertisement", "Business", "Collective Action",
                "Govt Criticism", "Entertainment", "Foreign",
                "Other Govt-related", "Culture", "Others"),
  General_Category = c("Non-Political", "Moderately-Political", "Highly-Political",
                       "Highly-Political", "Non-Political", "Moderately-Political",
                       "Highly-Political", "Non-Political", "Non-Political"),
  Sum = colSums(WeiboScope)[3:11],
  Percentage = round((colSums(WeiboScope)[3:11] / sum(colSums(WeiboScope)[3:11])) * 100,2)
)


# Merging the three censorship data source
Merged_Table <- merge(merge(WeChatScope_Table[,c(2,3,5)],
                            FreeWeChat_Table[,c(2,3,5)],
                            by = c("Category","General_Category"), all = TRUE),
                      WeiboScope_Table[,c(2,3,5)],
                      by = c("Category","General_Category"), all = TRUE)
colnames(Merged_Table)[3:5] <- c("WeChatScope", "FreeWeChat", "WeiboScope") 

Merged_Table <- Merged_Table[,c(2,1,3:5)]

Table.1 <- rbind(as.character(Merged_Table[Merged_Table$Category == "Collective Action",]),
                 as.character(Merged_Table[Merged_Table$Category == "Govt Criticism",]),
                 as.character(Merged_Table[Merged_Table$Category == "Other Govt-related",]),
                 c("Highly-Political", "Total", 
                   colSums(Merged_Table[Merged_Table$General_Category == "Highly-Political",3:5])),
                 as.character(Merged_Table[Merged_Table$Category == "Business",]),
                 as.character(Merged_Table[Merged_Table$Category == "Foreign",]),
                 c("Moderately-Political", "Total", 
                   colSums(Merged_Table[Merged_Table$General_Category == "Moderately-Political",3:5])),
                 as.character(Merged_Table[Merged_Table$Category == "Entertainment",]),
                 as.character(Merged_Table[Merged_Table$Category == "Advertisement",]),
                 as.character(Merged_Table[Merged_Table$Category == "Culture",]),
                 as.character(Merged_Table[Merged_Table$Category == "Others",]),
                 c("Non-Political", "Total", 
                   colSums(Merged_Table[Merged_Table$General_Category == "Non-Political",3:5])))
Table.1[,3:5] <- paste(Table.1[,3:5], "%", sep = "")
colnames(Table.1) <- colnames(Merged_Table)

Table.1


## ------------------------------
## Figure 1
## ------------------------------

# Combine highly political categories and combine other categories
## Calculate the proportion of political vs. non-political categories
FreeWeChat$Political <- rowSums(FreeWeChat[, c("COL", "GOV", "CRI")]) / rowSums(FreeWeChat[, 3:11])
FreeWeChat$NonPolitical <- rowSums(FreeWeChat[, c("ADS", "BET", "ESX","FOR", "LCT", "OTH")]) / rowSums(FreeWeChat[, 3:11])
WeChatScope$Political <- rowSums(WeChatScope[, c("COL", "GOV", "CRI")]) / rowSums(WeChatScope[, 3:11])
WeChatScope$NonPolitical <- rowSums(WeChatScope[, c("ADS", "BET", "ESX","FOR", "LCT", "OTH")]) / rowSums(WeChatScope[, 3:11])
WeiboScope$Political <- rowSums(WeiboScope[, c("COL", "GOV", "CRI")]) / rowSums(WeiboScope[, 3:11])
WeiboScope$NonPolitical <- rowSums(WeiboScope[, c("ADS", "BET", "ESX","FOR", "LCT", "OTH")]) / rowSums(WeiboScope[, 3:11])

# Merging the three censorship data source
Merged_Censor_Data <- merge(merge(FreeWeChat[,c(1,2,12,13)], WeChatScope[,c(1,2,12,13)],
                                  by = c("Year", "Month"), all = TRUE), WeiboScope[,c(1,2,12,13)], by = c("Year", "Month"), all = TRUE)
colnames(Merged_Censor_Data)[3:8] <- c("Pol_FWC", "Non_FWC", "Pol_WCS", "Non_WCS", "Pol_WBS", "Non_WBS") 
# Calculating the average percentage across the three censorship data source
Merged_Censor_Data$Ave_Pol <- rowMeans(Merged_Censor_Data[, c("Pol_FWC", "Pol_WBS", "Pol_WCS")], na.rm = TRUE)
Merged_Censor_Data$Ave_Non <- rowMeans(Merged_Censor_Data[, c("Non_FWC", "Non_WBS", "Non_WCS")], na.rm = TRUE)


# Create a data frame "Figure.1" for plotting the figure
Year_Month <- sprintf("%04d-%02d", Merged_Censor_Data$Year, Merged_Censor_Data$Month)
Censored_Month <- rep(Year_Month,2)
Percentage <- c(Merged_Censor_Data$Ave_Pol, Merged_Censor_Data$Ave_Non)
Category <- c(rep("Collective Actions &\nGovernment-related Content", nrow(Merged_Censor_Data)), 
              rep("Other Moderately Political &\nNon-Political Content", nrow(Merged_Censor_Data)))
Figure.1 <- data.frame(Censored_Month, Percentage, Category)

# Plot the figure
ggplot(Figure.1, aes(x = Censored_Month, y = Percentage, group = Category,
                     color = Category, linetype = Category, shape = Category)) +
  geom_point(aes(shape = Category)) +  # Set shape based on Category
  geom_line(aes(shape = Category)) +  # Set shape based on Category
  theme_classic() +
  theme(axis.text.x = element_text(face = "bold",
                                   size = 11, angle = 40, hjust = 1),
        axis.text.y = element_text(face = "bold",
                                   size = 12),
        axis.title.x = element_text(size = 14, face = "bold"),
        axis.title.y = element_text(size = 14, face = "bold"),
        legend.text = element_text(size = 10, face = "bold"),
        legend.title = element_text(size = 10, face = "bold"),
        legend.position = "bottom") +  # Move the legend below the figure
  scale_color_manual(values = c("red", "blue")) +  # Set line colors to red and blue
  scale_x_discrete(breaks = Year_Month[seq(1, length(Year_Month), by = 4)]) +  # Show x-axis labels every four entry
  scale_y_continuous(breaks=seq(0.2, 0.8, 0.1))+
  geom_vline(xintercept = c(seq(9.5, 69.5, by = 12)), linetype = 'dashed', color = "grey") +
  geom_text(aes(x = 3, label = "2016", y = 0.9), colour = "black", check_overlap = TRUE) +
  geom_text(aes(x = 12.5, label = "2017", y = 0.9), colour = "black", check_overlap = TRUE) +
  geom_text(aes(x = 24.5, label = "2018", y = 0.9), colour = "black", check_overlap = TRUE) +
  geom_text(aes(x = 36.5, label = "2019", y = 0.9), colour = "black", check_overlap = TRUE) +
  geom_text(aes(x = 48.5, label = "2020", y = 0.9), colour = "black", check_overlap = TRUE) +
  geom_text(aes(x = 60.5, label = "2021", y = 0.9), colour = "black", check_overlap = TRUE) +
  geom_text(aes(x = 72.5, label = "2022", y = 0.9), colour = "black", check_overlap = TRUE) +
  labs(y = "Proportion of All Censored Articles", x = "", size = 13) + 
  geom_text(aes(x = 53, label= "COVID-19\nOutbreak", y = .55), size =4, color = "black", check_overlap = T)+
  geom_text(aes(x = 22, label= "Huawei CFO Meng Wanzhou\nArrested in Canada", y = .58), size =4, color = "black", check_overlap = T)+
  geom_text(aes(x = 74, label= "End of\nZero-COVID", y = .55), size =4, color = "black", check_overlap = T)+
  geom_text(aes(x = 15, label= "US-China\nTrade War", y = .4), size =4, color = "black", check_overlap = T)+
  geom_text(aes(x = 40, label= "Hong Kong\nProtest", y = .52), size =4, color = "black", check_overlap = T)+
  geom_text(aes(x = 65, label= "Nancy Pelosi\nVisits Taiwan", y = .43), size =4, color = "black", check_overlap = T)+
  geom_curve(aes(x = 29, y = 0.53, xend = 33, yend = 0.46), 
             colour = "black", 
             size=0.15, 
             curvature = 0.1,
             arrow = arrow(length = unit(0.02, "npc")))+
  geom_curve(aes(x = 19, y = 0.39, xend = 24, yend = 0.34), 
             colour = "black", 
             size=0.15, 
             curvature = -0.1,
             arrow = arrow(length = unit(0.02, "npc")))+
  geom_curve(aes(x = 40.5, y = 0.48, xend = 42.5, yend = 0.42), 
             colour = "black", 
             size=0.15, 
             curvature = 0.1,
             arrow = arrow(length = unit(0.02, "npc")))+
  geom_curve(aes(x = 52, y = 0.50, xend = 49, yend = 0.45), 
             colour = "black", 
             size=0.15, 
             curvature = -0.2,
             arrow = arrow(length = unit(0.02, "npc")))+ 
  geom_curve(aes(x = 70, y = 0.41, xend = 74, yend = 0.40), 
             colour = "black", 
             size=0.15, 
             curvature = 0.2,
             arrow = arrow(length = unit(0.02, "npc")))+ 
  geom_curve(aes(x = 75, y = 0.50, xend = 79, yend = 0.48), 
             colour = "black", 
             size=0.15, 
             curvature = 0.2,
             arrow = arrow(length = unit(0.02, "npc")))

# Save the figure
ggsave(file = "Figures_Paper/Figure_01.pdf",   # The directory you want to save the file in
       width = 10, # The width of the plot in inches
       height = 6) # The height of the plot in inches



